
**********
* Readme *
**********

* This script aggregates income and expense items.

* Note: Original POF sample size: 
*  57 920 household units
*  58 039 families
* 178 431 individuals

* Note: Constant prices and population totals as of 15 jan 2018.


* Root folder (PATH TO BE DEFINED BY THE USER)
**********************************************
clear all
global analysis "C:/***/replication_package"


* Timestamped log
*****************
global today = strofreal(date(c(current_date), "DMY"), "%tdYYNNDD")
log using "${analysis}/code/logs/2_2_build_pof_fam_inc_exp_${today}.smcl", replace


***************************************************************************
* DESPESA COLETIVA & CADERNETA COLETIVA [ind_id, with FAMILY as a member] *
***************************************************************************

* Import
use          "${analysis}/data/source_files/pof/despesa_coletiva.dta", clear
append using "${analysis}/data/source_files/pof/caderneta_coletiva.dta"

* Code selected variables as string
ds COD_UPA NUM_DOM NUM_UC

foreach var in `r(varlist)' {
	tostring `var', replace
}

* Family unit identifier
gen fam_id = COD_UPA + "-" + NUM_DOM + "-" + NUM_UC
label var fam_id "Family unit identifier"

* Individual identifier
gen ind_id = COD_UPA + "-" + NUM_DOM + "-" + NUM_UC + "-FAMILY"
label var ind_id "Individual identifier"
sort ind_id V9001


* Collective expenses, monthly average expense by item
******************************************************

* Note: Number of months V9011 is meaningful for QUADRO 10 and 19 only.

* General collective expenses
gen       exp_value = (V8000_DEFLA         * FATOR_ANUALIZACAO)/12
replace   exp_value = (V8000_DEFLA * V9011 * FATOR_ANUALIZACAO)/12 if (QUADRO == 10 | QUADRO == 19)
label var exp_value "Value of expense item, monthly average"

* INSS relative to domestic worker hired		
gen       exp_value_inss = (V1904_DEFLA * V9011 * FATOR_ANUALIZACAO)/12
replace   exp_value_inss = 0 if missing(exp_value_inss)
label var exp_value_inss "Value of expense item, INSS, monthly average"

* Wrap up and export (duplicates allowed here)
**********************************************
global export_set ///
  fam_id ind_id V9001 V9002 exp_value exp_value_inss

keep  $export_set
order $export_set
sort  fam_id
compress
describe
save "${analysis}/data/2_2_pof_despesa_caderneta_coletiva_summary.dta", replace


********************************************************
* DESPESA INDIVIDUAL [ind_id, with FAMILY as a member] *
********************************************************

* Import
use "${analysis}/data/source_files/pof/despesa_individual.dta", clear

* Code selected variables as string
ds COD_UPA NUM_DOM NUM_UC COD_INFORMANTE

foreach var in `r(varlist)' {
	tostring `var', replace
}

* Family unit identifier
gen fam_id = COD_UPA + "-" + NUM_DOM + "-" + NUM_UC
label var fam_id "Family unit identifier"

* Individual identifier
gen ind_id = COD_UPA + "-" + NUM_DOM + "-" + NUM_UC + "-" + COD_INFORMANTE
label var ind_id "Individual identifier"
sort ind_id V9001


* Individual expenses, monthly average expense by item
******************************************************

* Note: Number of months V9011 is meaningful for QUADROS 44, 47, 48, 49 and 50.

* General individual expenses
gen       exp_value = (V8000_DEFLA         * FATOR_ANUALIZACAO)/12
replace   exp_value = (V8000_DEFLA * V9011 * FATOR_ANUALIZACAO)/12 if (QUADRO == 44 | QUADRO == 47 | QUADRO == 48 | QUADRO == 49 | QUADRO == 50)
label var exp_value "Value of expense item, monthly average"

* Housekeeping
keep fam_id ind_id V9001 V9002 exp_value

* Bring collective expenses in
append using "${analysis}/data/2_2_pof_despesa_caderneta_coletiva_summary.dta"
order fam_id ind_id V9001 V9002 exp_value exp_value_inss


* Individual expenses, monthly average expense by category
**********************************************************

* Import categories
merge m:1 V9001 using "${analysis}/data/1_3_aux_pof_map.dta", keepusing(exp_cat code)
drop if _merge == 2
keep  fam_id ind_id V9001 code V9002 exp_cat exp_value exp_value_inss 
order fam_id ind_id V9001 code V9002 exp_cat exp_value exp_value_inss
sort  ind_id V9001

* Credit expenses
bysort ind_id: egen temp = sum(exp_value) if inlist(V9002, 3, 4)
bysort ind_id: egen exp_credit = mean(temp)
drop temp

* Non monetary expenses
bysort ind_id: egen temp = sum(exp_value) if inlist(V9002, 7, 8, 9, 10, 11)
bysort ind_id: egen exp_non_monet = mean(temp)
drop temp

* Domestic maintenance (to be discounted from estimated home rent)
bysort ind_id: egen temp = sum(exp_value) if (inrange(V9002, 1, 6)) & (inrange(code, 8001, 8024) | inrange(code, 8026, 8068) | inrange(code, 8026, 8068) | inrange(code, 12005, 12008) | inrange(code, 12010, 12015) | inrange(code, 12017, 12020) | inrange(code, 12023, 12025) | inrange(code, 12027, 12036) | inlist(code, 8999, 10006, 10011, 12999))
bysort ind_id: egen exp_domest_maint = mean(temp)
drop temp

* Individual monthly bill by category of expense
tab exp_cat

bysort ind_id: egen temp = sum(exp_value) if exp_cat == "Clothing"
bysort ind_id: egen exp_cloth = mean(temp)
drop temp

bysort ind_id: egen temp = sum(exp_value) if exp_cat == "Debt repayment"
bysort ind_id: egen exp_debt = mean(temp)
drop temp

bysort ind_id: egen temp = sum(exp_value) if exp_cat == "Education"
bysort ind_id: egen exp_educ = mean(temp)
drop temp

bysort ind_id: egen temp = sum(exp_value) if exp_cat == "Food"
bysort ind_id: egen exp_food = mean(temp)
drop temp

bysort ind_id: egen temp = sum(exp_value) if exp_cat == "Healthcare"
bysort ind_id: egen exp_health = mean(temp)
drop temp

bysort ind_id: egen temp = sum(exp_value) if exp_cat == "Housing"
bysort ind_id: egen exp_house = mean(temp)
drop temp

bysort ind_id: egen temp = sum(exp_value) if exp_cat == "Investment"
bysort ind_id: egen exp_inv = mean(temp)
drop temp

bysort ind_id: egen temp = sum(exp_value) if exp_cat == "Medicine"
bysort ind_id: egen exp_medicine = mean(temp)
drop temp

bysort ind_id: egen temp = sum(exp_value) if exp_cat == "Misc"
bysort ind_id: egen exp_misc = mean(temp)
drop temp

bysort ind_id: egen temp = sum(exp_value) if exp_cat == "Personal"
bysort ind_id: egen exp_pers = mean(temp)
drop temp

bysort ind_id: egen temp = sum(exp_value) if exp_cat == "Taxes"
bysort ind_id: egen exp_tax = mean(temp)
drop temp

bysort ind_id: egen temp = sum(exp_value) if exp_cat == "Transport"
bysort ind_id: egen exp_transp = mean(temp)
drop temp

bysort ind_id: egen temp = sum(exp_value) if exp_cat == "Work taxes"
bysort ind_id: egen exp_work_tax = mean(temp)
drop temp

* Missing values to zeros
drop exp_cat
ds exp_*
foreach var in `r(varlist)' {
  replace `var' = 0 if missing(`var')
}

* INSS as tax expense
bysort ind_id: egen exp_value_inss_family = sum(exp_value_inss)
replace exp_tax = exp_tax + exp_value_inss_family
drop exp_value_inss exp_value_inss_family


* Wrap up and export
********************
label var exp_credit       "Expenses on credit, individual, monthly average" 
label var exp_non_monet    "Non monetary expenses, individual, monthly average" 
label var exp_domest_maint "To be discounted from estimated home rent"

label var exp_cloth        "Clothing expenses, individual, monthly average" 
label var exp_debt         "Debt repayment expenses, individual, monthly average" 
label var exp_educ         "Education expenses, individual, monthly average" 
label var exp_food         "Food expenses, individual, monthly average" 
label var exp_health       "Healthcare expenses (ex medicine), individual, monthly average" 
label var exp_house        "House expenses, individual, monthly average" 
label var exp_inv          "Investment expenses, individual, monthly average" 
label var exp_medicine     "Medicine expenses, individual, monthly average" 
label var exp_misc         "Other expenses, individual, monthly average" 
label var exp_pers         "Personal expenses and higiene, individual, monthly average" 
label var exp_tax          "Other taxes, individual, monthly average" 
label var exp_transp       "Transport expenses, individual, monthly average" 
label var exp_work_tax     "Work taxes, individual, monthly average" 

global export_set ///
  fam_id ind_id exp_credit exp_non_monet exp_domest_maint ///
  exp_cloth exp_debt exp_educ exp_food exp_health exp_house exp_inv exp_medicine exp_misc exp_pers exp_tax exp_transp exp_work_tax

keep  $export_set
order $export_set
sort  fam_id ind_id
duplicates drop
compress
describe
save "${analysis}/data/2_2_pof_despesa_individual_coletiva.dta", replace


******************************************************
* ALUGUEL ESTIMADO [ind_id, with FAMILY as a member] *
******************************************************

* Import
use "${analysis}/data/source_files/pof/aluguel_estimado.dta", clear

* Code selected variables as string
ds COD_UPA NUM_DOM NUM_UC

foreach var in `r(varlist)' {
	tostring `var', replace
}

* Family unit identifier
gen fam_id = COD_UPA + "-" + NUM_DOM + "-" + NUM_UC
label var fam_id "Family unit identifier"

* Individual identifier
gen ind_id = COD_UPA + "-" + NUM_DOM + "-" + NUM_UC + "-FAMILY"
label var ind_id "Individual identifier"
sort ind_id V9001


* Estimated rent (monthly average over a year)
**********************************************
gen est_rent = (V8000_DEFLA * V9011 * FATOR_ANUALIZACAO)/12
label var est_rent "Estimated rent value, monthly average"

* Wrap up and export
********************
global export_set ///
  fam_id ind_id est_rent

keep  $export_set
order $export_set
sort fam_id ind_id
duplicates drop
compress
describe
save "${analysis}/data/2_2_pof_aluguel_estimado_summary.dta", replace


************************************************************
* FROM INDIVIDUAL TO FAMILY EXPENSES (ex-domestic workers) *
************************************************************

use "${analysis}/data/2_1_pof_attributes.dta", clear
drop if family_position == 900
keep fam_id ind_id
sort fam_id ind_id

merge 1:1 ind_id using "${analysis}/data/2_2_pof_despesa_individual_coletiva.dta", nogenerate
sort fam_id ind_id

merge 1:1 ind_id using "${analysis}/data/2_2_pof_aluguel_estimado_summary.dta", nogenerate
sort fam_id ind_id

describe

ds exp_* est_rent
foreach var in `r(varlist)' {
  bysort fam_id: egen fam_`var' = sum(`var')
}


* Wrap up and export
********************
label var fam_exp_credit        "Credit expenses, family total, monthly average" 
label var fam_exp_non_monet     "Non monetary expenses, family total, monthly average" 
label var fam_exp_domest_maint  "To be discounted from estimated home rent"

label var fam_exp_cloth         "Clothing expenses, family total, monthly average" 
label var fam_exp_debt          "Debt repayment expenses, family total, monthly average" 
label var fam_exp_educ          "Education expenses, family total, monthly average" 
label var fam_exp_food          "Food expenses, family total, monthly average" 
label var fam_exp_health        "Healthcare expenses (ex-medicine), family total, monthly average" 
label var fam_exp_house         "House expenses, family total, monthly average" 
label var fam_exp_inv           "Investment expenses, family total, monthly average" 
label var fam_exp_medicine      "Medicine expenses, family total, monthly average" 
label var fam_exp_misc          "Miscellaneous expenses, family total, monthly average" 
label var fam_exp_pers          "Personal expenses and higiene, family total, monthly average" 
label var fam_exp_tax           "Other taxes, family total, monthly average" 
label var fam_exp_transp        "Transport expenses, family total, monthly average" 
label var fam_exp_work_tax      "Work taxes, family total, monthly average" 

label var fam_est_rent          "Estimated rent, monthly average" 

global export_set fam_*
keep  $export_set
order $export_set
sort fam_id
duplicates drop
compress
describe
save "${analysis}/data/2_2_pof_family_expenses.dta", replace


***********************************
* RENDIMENTO DO TRABALHO [ind_id] *
***********************************

* Import
use "${analysis}/data/source_files/pof/rendimento_trabalho.dta", clear

* Code selected variables as string
ds COD_UPA NUM_DOM NUM_UC COD_INFORMANTE

foreach var in `r(varlist)' {
	tostring `var', replace
}

* Family unit identifier
gen fam_id = COD_UPA + "-" + NUM_DOM + "-" + NUM_UC
label var fam_id "Family unit identifier"

* Individual identifier
gen ind_id = COD_UPA + "-" + NUM_DOM + "-" + NUM_UC + "-" + COD_INFORMANTE
label var ind_id "Individual identifier"


* Individual labor income, monthly average by item
**************************************************
gen inc_work_value = (V8500_DEFLA * V9011)/12

* Taxes and deductions
replace V531112_DEFLA = 0 if missing(V531112_DEFLA)
replace V531122_DEFLA = 0 if missing(V531122_DEFLA)
replace V531132_DEFLA = 0 if missing(V531132_DEFLA)

gen exp_work_tax_value = ((V531112_DEFLA + V531122_DEFLA + V531132_DEFLA) * V9011)/12

* Main job income
gen     inc_work_main_t = inc_work_value if SUB_QUADRO == 1
replace inc_work_main_t = 0              if SUB_QUADRO != 1

bysort ind_id: egen inc_work_main = sum(inc_work_main_t)
drop inc_work_main_t

* Main job taxes
gen     exp_work_main_tax_t = exp_work_tax_value if SUB_QUADRO == 1
replace exp_work_main_tax_t = 0                  if SUB_QUADRO != 1

bysort ind_id: egen exp_work_main_tax = sum(exp_work_main_tax_t)
drop exp_work_main_tax_t

* Sec jobs income
gen     inc_work_sec_t = inc_work_value if SUB_QUADRO == 2
replace inc_work_sec_t = 0              if SUB_QUADRO != 2

bysort ind_id: egen inc_work_sec = sum(inc_work_sec_t)
drop inc_work_sec_t

* Sec jobs taxes
gen     exp_work_sec_tax_t = exp_work_tax_value if SUB_QUADRO == 2
replace exp_work_sec_tax_t = 0                  if SUB_QUADRO != 2

bysort ind_id: egen exp_work_sec_tax = sum(exp_work_sec_tax_t)
drop exp_work_sec_tax_t


* Wrap up and export
********************
label var inc_work_main     "Main job, gross income, individual, monthly average"
label var exp_work_main_tax "Main job, taxes, individual, monthly average"
label var inc_work_sec      "Secondary jobs, gross income, individual, monthly average"
label var exp_work_sec_tax  "Secondary jobs, taxes, individual, monthly average"

global export_set ///
  fam_id ind_id inc_work_main exp_work_main_tax inc_work_sec exp_work_sec_tax
 
keep  $export_set
order $export_set
sort  ind_id
duplicates drop
compress
describe
save "${analysis}/data/2_2_pof_rendimento_trabalho_summary.dta", replace


*******************************
* OUTROS RENDIMENTOS [ind_id] *
*******************************

* Import
use "${analysis}/data/source_files/pof/outros_rendimentos.dta", clear

* Code selected variables as string
ds COD_UPA NUM_DOM NUM_UC COD_INFORMANTE

foreach var in `r(varlist)' {
	tostring `var', replace
}

* Family unit identifier
gen fam_id = COD_UPA + "-" + NUM_DOM + "-" + NUM_UC
label var fam_id "Family unit identifier"

* Individual identifier
gen ind_id = COD_UPA + "-" + NUM_DOM + "-" + NUM_UC + "-" + COD_INFORMANTE
label var ind_id "Individual identifier"


*******************************************
* General individual other income sources *
*******************************************

* Value of other income itens (monthly average)
gen     inc_other_value = (V8500_DEFLA         * FATOR_ANUALIZACAO)/12
replace inc_other_value = (V8500_DEFLA * V9011 * FATOR_ANUALIZACAO)/12 if (QUADRO == 54)

* Value of individual deduction itens (monthly average)
gen     exp_other_deduction_value = (V8501_DEFLA         * FATOR_ANUALIZACAO)/12
replace exp_other_deduction_value = (V8501_DEFLA * V9011 * FATOR_ANUALIZACAO)/12 if (QUADRO == 54)


* Individual other income, monthly average by category
******************************************************

* Import categories
merge m:1 V9001 using "${analysis}/data/1_3_aux_pof_map.dta", keepusing(inc_cat code)
drop if _merge == 2
keep  fam_id ind_id V9001 code inc_cat inc_other_value exp_other_deduction_value 
order fam_id ind_id V9001 code inc_cat inc_other_value exp_other_deduction_value 
sort  ind_id V9001

tab inc_cat


* OTHER WORK INCOME (extra hours, performance pay, other bonuses)
*****************************************************************
bysort ind_id: egen temp = sum(inc_other_value) if inc_cat == "Other work income"
bysort ind_id: egen inc_other_work = mean(temp)
drop temp


* TRANSFERS (private and government transfers)
**********************************************
bysort ind_id: egen temp = sum(inc_other_value) if inc_cat == "Transfers"
bysort ind_id: egen inc_other_transf = mean(temp)
drop temp


* ASSET SALES (non financial)
*****************************
bysort ind_id: egen temp = sum(inc_other_value) if inc_cat == "Asset sale"
bysort ind_id: egen inc_other_asset_sale = mean(temp)
drop temp


* BORROWING
***********
bysort ind_id: egen temp = sum(inc_other_value) if inc_cat == "Borrowing"
bysort ind_id: egen inc_other_borrowing = mean(temp)
drop temp


* RENT (financial and non financial) and others
***********************************************
bysort ind_id: egen temp = sum(inc_other_value) if inc_cat == "Rent"
bysort ind_id: egen inc_other_rent = mean(temp)
drop temp


* NET DIVESTMENT (financial assets)
***********************************
bysort ind_id: egen withdrawn_5701_t = sum(inc_other_value) if inlist(V9001, 5700101) 
bysort ind_id: egen withdrawn_5702_t = sum(inc_other_value) if inlist(V9001, 5700201)
bysort ind_id: egen withdrawn_5703_t = sum(inc_other_value) if inlist(V9001, 5700301)
bysort ind_id: egen withdrawn_5704_t = sum(inc_other_value) if inlist(V9001, 5700401)

bysort ind_id: egen deposit_5601_t = sum(inc_other_value) if inlist(V9001, 5600101) 
bysort ind_id: egen deposit_5602_t = sum(inc_other_value) if inlist(V9001, 5600201)
bysort ind_id: egen deposit_5603_t = sum(inc_other_value) if inlist(V9001, 5600301)
bysort ind_id: egen deposit_5604_t = sum(inc_other_value) if inlist(V9001, 5600401)

bysort ind_id: egen withdrawn_5701 = mean(withdrawn_5701_t)
bysort ind_id: egen withdrawn_5702 = mean(withdrawn_5702_t)
bysort ind_id: egen withdrawn_5703 = mean(withdrawn_5703_t)
bysort ind_id: egen withdrawn_5704 = mean(withdrawn_5704_t)

bysort ind_id: egen deposit_5601 = mean(deposit_5601_t)
bysort ind_id: egen deposit_5602 = mean(deposit_5602_t)
bysort ind_id: egen deposit_5603 = mean(deposit_5603_t)
bysort ind_id: egen deposit_5604 = mean(deposit_5604_t)

replace withdrawn_5701 = 0 if withdrawn_5701 == .
replace withdrawn_5702 = 0 if withdrawn_5702 == .
replace withdrawn_5703 = 0 if withdrawn_5703 == .
replace withdrawn_5704 = 0 if withdrawn_5704 == .

replace deposit_5601 = 0 if deposit_5601 == .
replace deposit_5602 = 0 if deposit_5602 == .
replace deposit_5603 = 0 if deposit_5603 == .
replace deposit_5604 = 0 if deposit_5604 == .

gen net_divestment_1 = withdrawn_5701 - deposit_5601 
gen net_divestment_2 = withdrawn_5702 - deposit_5602
gen net_divestment_3 = withdrawn_5703 - deposit_5603
gen net_divestment_4 = withdrawn_5704 - deposit_5604

replace net_divestment_1 = 0 if (withdrawn_5701 < deposit_5601)
replace net_divestment_2 = 0 if (withdrawn_5702 < deposit_5602)
replace net_divestment_3 = 0 if (withdrawn_5703 < deposit_5603)
replace net_divestment_4 = 0 if (withdrawn_5704 < deposit_5604)

gen inc_other_net_divestment = net_divestment_1 + net_divestment_2 + net_divestment_3 + net_divestment_4

drop withdrawn_* deposit_* net_divestment_*


* Deductions
************
bysort ind_id: egen exp_other_deductions = sum(exp_other_deduction_value)

bysort ind_id: egen temp = sum(exp_other_deduction_value) if inc_cat == "Other work income"
bysort ind_id: egen exp_other_work_deductions = mean(temp)
drop temp


* From missing to zero
drop inc_cat
ds inc_* exp_*
foreach var in `r(varlist)' {
  replace `var' = 0 if missing(`var')
}


* All categories
****************
gen inc_other = inc_other_work + inc_other_transf + inc_other_asset_sale + inc_other_borrowing + inc_other_rent + inc_other_net_divestment


* Wrap up and export
********************
label var inc_other                "Other income, all sources, individual, monthly average"
label var inc_other_work           "Other income, other work income, gross, individual, monthly average"
label var inc_other_transf         "Other income, transfers, gross, individual, monthly average"
label var inc_other_asset_sale     "Other income, asset sale, gross, individual, monthly average"
label var inc_other_borrowing      "Other income, borrowing, gross, individual, monthly average"
label var inc_other_rent           "Other income, rent, gross, individual, monthly average"
label var inc_other_net_divestment "Other income, net divestment, individual, monthly average"

label var exp_other_deductions      "Other expenses, all deductions, individual, monthly average"
label var exp_other_work_deductions "Other expenses, other work income, deductions, individual, monthly average"

global export_set ///
  fam_id ind_id inc_other ///
  inc_other_work inc_other_transf inc_other_asset_sale inc_other_borrowing inc_other_rent inc_other_net_divestment ///
  exp_other_deductions exp_other_work_deductions

keep  $export_set
order $export_set
sort  ind_id
duplicates drop
compress
describe
save "${analysis}/data/2_2_pof_outros_rendimento_summary.dta", replace


**********************************************************
* FROM INDIVIDUAL TO FAMILY INCOME (ex-domestic workers) *
**********************************************************
use                    "${analysis}/data/2_1_pof_attributes.dta", clear
merge 1:1 ind_id using "${analysis}/data/2_2_pof_rendimento_trabalho_summary.dta", nogenerate
merge 1:1 ind_id using "${analysis}/data/2_2_pof_outros_rendimento_summary.dta", nogenerate
drop if family_position == 900
keep fam_id ind_id inc_* exp_*
sort ind_id
describe


* Individual contribution to family income with net available income from work
******************************************************************************
ds inc_* exp_*
foreach var in `r(varlist)' {
  replace `var' = 0 if missing(`var')
}

gen ind_avg_winc = inc_work_main     + inc_work_sec     + inc_other_work ///
                 - exp_work_main_tax - exp_work_sec_tax - exp_other_work_deductions

label var ind_avg_winc "Individual net available work income, monthly average"


* Aggregate income and expenses at family level
***********************************************
ds inc_* exp_*
foreach var in `r(varlist)' {
  bysort fam_id: egen fam_`var' = sum(`var')
}


* Wrap up and export
********************
label var fam_inc_work_main             "Main job, gross income, family total, monthly average"
label var fam_inc_work_sec              "Secondary jobs, gross income, family total, monthly average"
label var fam_inc_other                 "Other income, all sources, family total, monthly average"
label var fam_inc_other_work            "Other income, other work income, gross, family total, monthly average"
label var fam_inc_other_transf          "Other income, transfers, gross, family total, monthly average"
label var fam_inc_other_asset_sale      "Other income, asset sale, gross, family total, monthly average"
label var fam_inc_other_borrowing       "Other income, borrowing, gross, family total, monthly average"
label var fam_inc_other_rent            "Other income, rent, gross, family total, monthly average"
label var fam_inc_other_net_divestment  "Other income, net divestment, family total, monthly average"

label var fam_exp_work_main_tax         "Main job, taxes, family total, monthly average"
label var fam_exp_work_sec_tax          "Secondary jobs, taxes, family total, monthly average"
label var fam_exp_other_deductions      "Other expenses, all deductions, family total, monthly average"
label var fam_exp_other_work_deductions "Other expenses, other work income, deductions, family total, monthly average"

keep fam_* ind_id ind_avg_winc
duplicates drop
compress
describe
save "${analysis}/data/2_2_pof_family_income.dta", replace


**************************************
*  FAMILY EXPENSES and FAMILY INCOME * 
**************************************
use                    "${analysis}/data/2_1_pof_attributes.dta", clear
merge m:1 ind_id using "${analysis}/data/2_2_pof_family_income.dta", nogenerate
merge m:1 fam_id using "${analysis}/data/2_2_pof_family_expenses.dta", nogenerate


* Final non monetary income for the household
*********************************************
gen       fam_inc_non_monet = fam_exp_non_monet + (fam_est_rent - fam_exp_domest_maint)
replace   fam_inc_non_monet = fam_exp_non_monet if fam_est_rent < fam_exp_domest_maint
label var fam_inc_non_monet "Non monetary income, family total, monthly average" 


* Final family income composition
*********************************
gen fam_inc_work = fam_inc_work_main	+ fam_inc_work_sec	+ fam_inc_other_work
label var fam_inc_work "Work gross income, all sources, family total, monthly average"

gen fam_inc_tot = fam_inc_work + fam_inc_other_transf	+ fam_inc_other_asset_sale	+ fam_inc_other_borrowing	///
                + fam_inc_other_rent	+ fam_inc_other_net_divestment	+ fam_inc_non_monet

label var fam_inc_tot "Total family income, monthly average" 


* Final family expenses composition
***********************************
gen fam_exp_housing = fam_est_rent + fam_exp_house
label var fam_exp_housing "Housing expenses, family total, monthly average"

replace fam_exp_misc = fam_exp_misc + fam_exp_other_deductions
label var fam_exp_misc "Other miscellaneous expenses and deductions, family total, monthly average"

gen fam_exp_tax_total = fam_exp_tax + fam_exp_work_tax + fam_exp_work_main_tax + fam_exp_work_sec_tax 
label var fam_exp_tax_total "Taxes, family total, monthly average"

gen fam_exp_tot = fam_exp_housing ///
                + fam_exp_cloth + fam_exp_educ + fam_exp_food + fam_exp_health + fam_exp_medicine + fam_exp_pers + fam_exp_transp + fam_exp_misc ///
                + fam_exp_inv + fam_exp_debt ///
                + fam_exp_tax_total

label var fam_exp_tot "Total family expenses, monthly average" 


* Family disposable income
**************************
gen fam_net_inc = fam_inc_tot - fam_exp_tax_total
label var fam_net_inc "Family income net of taxes, monthly average" 


* Family disposable income per capita
*************************************
gen fam_net_inc_pc = fam_net_inc / family_size
label var fam_net_inc_pc "Family net income per capita, monthly average" 


* Family disposable income per capita ex-me
*******************************************
gen fam_net_inc_pc_ex_me = (fam_net_inc - ind_avg_winc) / family_size
label var fam_net_inc_pc_ex_me "Family net income per capita ex-me, monthly average" 


* Family expenses per capita
****************************
gen fam_exp_pc = fam_exp_tot / family_size
label var fam_exp_pc "Family expenses per capita, monthly average" 


* Wrap up and export
********************
gen survey = "pof"
label var survey "Data source"

global export_set ///
  survey state region strata_id urban psu_id pweight hu_id fam_id ind_id ///
  work_state work_state_name work_state_ibge work_state_ibge_name occupation sector formal ///
  family_position male white race_gender years_age age years_educ educ attending_school ///
  health_plan no_credit_card no_current_account no_savings_account no_overdraft_facility ///
  winc ln_winc ln_winc_oaw ln_winc_employee ln_winc_employer ///
  main_job_gross_inc main_job_deductions main_job_work_hours main_job_full_time transit_time main_job_month ///
  other_work_inc_gross other_work_inc_tax ///
  family_size n_kids n_youngs n_adults n_seniors ///
  fam_inc_tot fam_inc_work fam_inc_other_transf fam_inc_other_asset_sale fam_inc_other_borrowing fam_inc_other_rent fam_inc_other_net_divestment fam_inc_non_monet ///
  fam_exp_tot fam_exp_housing fam_exp_cloth fam_exp_educ fam_exp_food fam_exp_health fam_exp_pers fam_exp_transp fam_exp_medicine fam_exp_misc fam_exp_inv fam_exp_debt fam_exp_tax_total ///
  fam_net_inc fam_net_inc_pc fam_net_inc_pc_ex_me ///
  gross_inc_total net_inc_pc /// 
  house_own house_rented house_other has_fridge has_tv has_pc has_car has_new_fridge has_new_tv has_new_pc has_new_car ///
  people_per_sleep_room piped_water_lack waste_lack energy_lack unpaved_street food_insec ///
  ends_meet good_cond_* bad_cond_* good_infr_* bad_infr_* house_* food_* ///
  rg_* fp_* age_* educ_* ae_* at_* region_*

keep  $export_set
order $export_set
sort ind_id
duplicates drop
compress
describe

save "${analysis}/data/2_2_pof_all.dta", replace


* Housekeeping
**************
erase "${analysis}/data/2_2_pof_family_income.dta"
erase "${analysis}/data/2_2_pof_outros_rendimento_summary.dta"
erase "${analysis}/data/2_2_pof_rendimento_trabalho_summary.dta"
erase "${analysis}/data/2_2_pof_family_expenses.dta"
erase "${analysis}/data/2_2_pof_aluguel_estimado_summary.dta"
erase "${analysis}/data/2_2_pof_despesa_individual_coletiva.dta"
erase "${analysis}/data/2_2_pof_despesa_caderneta_coletiva_summary.dta"
erase "${analysis}/data/2_1_pof_attributes.dta"


* Sanity check
**************
use "${analysis}/data/2_2_pof_all.dta", clear
drop if family_position == 900

* Drop individual specific data
keep ///
  region strata_id urban psu_id pweight fam_id ///
  fam_inc_tot fam_inc_work fam_inc_other_transf fam_inc_other_asset_sale fam_inc_other_borrowing fam_inc_other_rent fam_inc_other_net_divestment fam_inc_non_monet ///
  fam_exp_tot fam_exp_housing fam_exp_cloth fam_exp_educ fam_exp_food fam_exp_health fam_exp_pers fam_exp_transp fam_exp_medicine fam_exp_misc fam_exp_inv fam_exp_debt fam_exp_tax_total ///
  fam_net_inc gross_inc_total

duplicates drop

svyset psu_id [pweight = pweight], strata(strata_id) vce(linearized) singleunit(centered)  

* RENDIMENTO FAMILIAR TOTAL 5 426
* DESPESA FAMILIAR TOTAL    4 649
svy: mean fam_inc_tot fam_exp_tot

* BREAKDOWN OF INCOME
***********************
* TRABALHO                 3 118
* TRANSFERENCIAS PUBLICAS  1 057 = transf totais 1073 - 16 transf privada
* NAO MONETARIO              786
* RENT                       126 = aluguel 89 + outras rendas 37
* VARIACAO PATRIMONIAL       338 = divest 108 + asset sale 181 + borrowing 32 + 16 transf privada

svy: mean fam_inc_work fam_inc_other_transf fam_inc_other_asset_sale fam_inc_other_borrowing fam_inc_other_rent fam_inc_other_net_divestment fam_inc_non_monet

* BREAKDOWN OF EXPENSES
***********************

* HABITACAO           1 377
* TRANSPORTE            679
* ALIMENTACAO           658

svy: mean fam_exp_housing fam_exp_transp fam_exp_food

* ASSISTENCIA A SAUDE   302 = HEALTH OTHERS 154 + MEDICINE 148 
* EDUC                  272 = EDUCACAO 175 + RECREACAO E CULTURA 96
* PERSONAL EXPENSES     203 = SERVICOS PESSOAIS 48 + HIGIENE E CUIDADOS PESSOAIS 136 + FUMO 17
* VESTUARIO             160

svy: mean fam_exp_health fam_exp_medicine fam_exp_educ fam_exp_pers fam_exp_cloth

* TAX     379 = IMPOSTOS 217 + CONTRIBUICOES TRABALHISTAS 162
* MISC    221 = DESPESAS DIVERSAS 113 + PENSOES, MESADAS E DOACOES 40 + OUTRAS 69
* INVEST  200 = AUMENTO DO ATIVO 188 + PREVIDENCIA PRIVADA 11
* DEBT    197 = DIMINUICAO DO PASSIVO 150 + SERVICOS BANCARIOS 46

svy: mean fam_exp_tax_total fam_exp_misc fam_exp_inv fam_exp_debt


******************************************
* Subset to urban working age population *
******************************************
use "${analysis}/data/2_2_pof_all.dta", clear
svyset psu_id [pweight = pweight], strata(strata_id) singleunit(centered)

* How many rural?
svy: mean urban
mean urban

* Remove rural areas
drop if urban == 0

* How many outside working age in urban pop?
gen outside_work_age = (years_age < 14 | years_age > 64)
svy: mean outside_work_age
mean outside_work_age
drop outside_work_age

* Remove observations below working age
drop if years_age < 14 & !missing(years_age)

label define age 1 "14-24", modify

label var ae_1_1 "14-24, less than primary school"
label var ae_1_2 "14-24, primary school"
label var ae_1_3 "14-24, high school"
label var ae_1_4 "14-24, college or above"

* Remove observations above working age
drop if years_age > 64 & !missing(years_age)

drop age_6
drop fp_os

drop ae_6_1
drop ae_6_2
drop ae_6_3
drop ae_6_4

* Drop domestic workers living at their client's house
gen domestic = (family_position == "Domestic worker":family_position)
svy: mean domestic
mean domestic
drop if domestic == 1
drop domestic

* Drop workers with no reported work income
drop if work_state == "Own-account worker":work_state & missing(winc)
drop if work_state == "Employee":work_state & missing(winc)
drop if work_state == "Employer":work_state & missing(winc)


*************************
* Winsorize work income *
*************************
gen fweight = round(pweight)

* Winsorise available work income
sum winc [fweight = fweight], detail

scalar winc_low_bound = r(p1)
scalar winc_up_bound = r(p99)

replace winc = scalar(winc_low_bound) if winc < scalar(winc_low_bound) & !missing(winc)
replace winc = scalar(winc_up_bound)  if winc > scalar(winc_up_bound)  & !missing(winc)

sum winc [fweight = fweight], detail

* Winsorise log available work income
sum ln_winc [fweight = fweight], detail

scalar ln_winc_low_bound = r(p1)
scalar ln_winc_up_bound = r(p99)

replace ln_winc = scalar(ln_winc_low_bound) if ln_winc < scalar(ln_winc_low_bound) & !missing(ln_winc)
replace ln_winc = scalar(ln_winc_up_bound)  if ln_winc > scalar(ln_winc_up_bound)  & !missing(ln_winc)

sum ln_winc [fweight = fweight], detail

sum     ln_winc_oaw [fweight = fweight], detail
replace ln_winc_oaw = ln_winc if work_state_name == "Own-account worker" 
sum     ln_winc_oaw [fweight = fweight], detail

sum     ln_winc_employee [fweight = fweight], detail
replace ln_winc_employee = ln_winc if work_state_name == "Employee"
sum     ln_winc_employee [fweight = fweight], detail

sum     ln_winc_employer [fweight = fweight], detail
replace ln_winc_employer = ln_winc if work_state_name == "Employer"
sum     ln_winc_employer [fweight = fweight], detail

drop fweight


* Export main sub sample
************************
sort strata_id ind_id
duplicates drop
compress
describe

* How many households?
codebook hu_id

* How many families?
codebook fam_id

* How many individuals?
codebook ind_id

* Export
save "${analysis}/data/2_2_pof_clean.dta", replace


* End of script
***************
cap log close